% Estimation of the logp1 with Area dependce models
% covariates prediction
% -To do the unshrunken/shrunken comparison

%% Housekeeping
clc; clear all; close all;
workpath00 = pwd;

addpath('toolbox_xt');
addpath('toolbox_plot');

%% load data
load('data_all.mat');

%% Two panel variables
% xtflag and xtval: combofips
% xtflag2 and xtval2: year
% xtflag3 and xtval3: combofips/year
xtflag3 = xtflag + xtflag2/10000;
xtval3 = unique(xtflag3);

nobs = size(YY,1);
ncity = length(xtval);
nyear = 6;

%% Data for the esitmation
% sel_est = true(nobs,1);  %include all

[~,~,info] = xt_reg(YY, ones(nobs,1), xtflag3, xtval3);
sel_est = (info.n>=5);
% sel_est = true(size(YY,1),1);

est.YY      = YY(sel_est,:);
est.xtflag  = xtflag(sel_est,:);
est.xtval   = unique(est.xtflag);
est.xtflag2 = xtflag2(sel_est,:);
est.xtval2  = unique(est.xtflag2);
est.xtflag3 = xtflag3(sel_est,:);
est.xtval3  = unique(est.xtflag3);
est.nobs    = size(xtflag,1);

% Other controls
YD = [year_dummy2, year_dummy3, year_dummy4, year_dummy5, year_dummy6];

% estimation
est.YD = YD(sel_est,:);
est.XX = XX(sel_est,:);
est.logDC = logDC(sel_est,:);
est.DD = DD(sel_est,:);

est.ZZ = ZZ(sel_est,:);
[~,temp_ZZ2] = xt_reg(est.ZZ(:,2), ones(size(est.ZZ,1),1), est.xtflag, est.xtval);
est.ZZ2 = [ones(size(temp_ZZ2,1),1), temp_ZZ2];

estinfo.islog = 2;
estinfo.FitMethod = 'REML';

%% Preliminary
nobs = size(est.YY,1);
ncity = length(est.xtval);
nyear = 6;

temp_y = est.YY;
temp_yd = est.YD;

temp_XX    = est.XX;
temp_ZZ    = est.ZZ;
temp_ZZ2   = est.ZZ2;
temp_logDC = est.logDC;

islog = estinfo.islog;
if islog == 0
    temp_d = [est.DD];
elseif islog == 1
    temp_d = [log(est.DD)];
elseif islog == 2
    temp_d = [log(1+est.DD)];
end

xtflag = est.xtflag;
xtval  = est.xtval;

xtflag2 = est.xtflag2;
xtval2  = est.xtval2;

xtflag3 = est.xtflag3;
xtval3  = est.xtval3;

ZZ  = est.ZZ;
ZZ2 = est.ZZ2;

%% Actual estimation
% Model 1: covariate depends on distance and its prior depends on area
% Things can be improved
% a) other information such as distance to coast
% b) full covariance of regression error error for X1 and error for X2 for
% example
% c) taking account for prediction uncertainty
Options = [];
Options.Display = 'iter';
Options.TolFun = 1e-12;
% Options = optimoptions('fminunc', 'TolFun', 1e-15, 'TolX', 1e-15, 'MaxFunEvals', 2000);

temp_XXX = [temp_XX, temp_XX(:,14).^2, temp_XX(:,14).^3];

% shrunken
estX_M1 = [];
tractX = [];
for vind = 1:1:size(temp_XXX,2)
    temp_x = [ones(nobs,1) temp_ZZ(:,2), temp_d, temp_d.*temp_ZZ(:,2)];
    temp_y = temp_XXX(:,vind);
    temp_m = fitlmematrix(temp_x,temp_y,{[ones(nobs,1), temp_d]},{xtflag},...
        'CovariancePattern',{'FullCholesky'},...
        'RandomEffectPredictors',{{'ej1','ej2'}},'RandomEffectGroups',{'city'},...
        'OptimizerOptions',Options, ...
        'FitMethod', estinfo.FitMethod, ...
        'Verbose', 0);
    temp_m
    
    % for prediction
    new_nobs = size(tract.combofips,1);
    new_ZZ = zeros(new_nobs,1);
    for i=1:1:size(temp_ZZ2,1)
        new_ZZ(tract.combofips==xtval(i),1) = temp_ZZ2(i,2);
    end
    
    new_x = [ones(new_nobs,1), new_ZZ, log(1+tract.min_dist), new_ZZ.*log(1+tract.min_dist)];
    new_z = {[ones(new_nobs,1), log(1+tract.min_dist)]};
    new_g = tract.combofips;
    
    new_y = predict(temp_m,new_x,new_z,new_g);
    tractX = [tractX,new_y];
    
    eval(['estX_M1.model',num2str(vind),' = temp_m;']);
end
estX_M1.predX = tractX;

% unshrunken M1
estX_M1_unsh = [];
tractX = [];
for vind = 1:1:size(temp_XXX,2)
    
    temp_x = [ones(nobs,1), temp_d];
    temp_y = temp_XXX(:,vind);
    
    % unshrunken estimator
    unsh_bet = [];
    unsh_xtval = [];
    for j=1:1:length(xtval)
        temp_sel = xtflag == xtval(j);
        temp_xx = temp_x(temp_sel,:);
        temp_yy = temp_y(temp_sel,:);
        temp_bet = (temp_xx'*temp_xx)\(temp_xx'*temp_yy);        
        unsh_bet = [unsh_bet; temp_bet'];
        unsh_xtval = [unsh_xtval; xtval(j)];
    end
    
    % prediction
    unsh_yy = ones(size(tract.combofips,1),1)*nan;
    for j=1:1:length(xtval)
        temp_sel = tract.combofips == xtval(j);
        new_dd = log(1+tract.min_dist(temp_sel,:));
        new_xx = [ones(sum(temp_sel),1), new_dd];
        new_yy = new_xx*unsh_bet(j,:)';
        unsh_yy(temp_sel,:) = new_yy;
    end

    % store
    tractX = [tractX, unsh_yy];

    eval(['estX_M1_unsh.bet',num2str(vind),' = unsh_bet;']);
    eval(['estX_M1_unsh.xtval',num2str(vind),' = unsh_xtval;']);
end
estX_M1_unsh.predX = tractX;


%% Actual estimation
% Model 2: covariate depends on distance and its prior depends on area
% - Same as Model 1 but it depends on distance to coast as well
% Things can be improved
% a) other information such as distance to coast
% b) full covariance of regression error error for X1 and error for X2 for
% example
% c) taking account for prediction uncertainty
Options = [];
Options.Display = 'iter';
Options.TolFun = 1e-12;
% Options = optimoptions('fminunc', 'TolFun', 1e-15, 'TolX', 1e-15, 'MaxFunEvals', 2000);

temp_XXX = [temp_XX, temp_XX(:,14).^2, temp_XX(:,14).^3];

estX_M2 = [];
tractX = [];
for vind = 1:1:size(temp_XXX,2)
    temp_x = [ones(nobs,1) temp_ZZ(:,2), temp_d, temp_d.*temp_ZZ(:,2), temp_logDC, temp_logDC.*temp_ZZ(:,2)];
    temp_y = temp_XXX(:,vind);
    temp_m = fitlmematrix(temp_x,temp_y,{[ones(nobs,1), temp_d, temp_logDC]},{xtflag},...
        'CovariancePattern',{'FullCholesky'},...
        'RandomEffectPredictors',{{'ej1','ej2','ej3'}},'RandomEffectGroups',{'city'},...
        'OptimizerOptions',Options, ...
        'FitMethod', estinfo.FitMethod, ...
        'Verbose', 0);
    temp_m
    
    % for prediction
    new_nobs = size(tract.combofips,1);
    new_ZZ = zeros(new_nobs,1);
    for i=1:1:size(temp_ZZ2,1)
        new_ZZ(tract.combofips==xtval(i),1) = temp_ZZ2(i,2);
    end
    
    new_x = [ones(new_nobs,1), new_ZZ, ...
        log(1+tract.min_dist), new_ZZ.*log(1+tract.min_dist), ...
        log(tract.d2c), new_ZZ.*log(tract.d2c), ...
        ];
    new_z = {[ones(new_nobs,1), log(1+tract.min_dist), log(tract.d2c)]};
    new_g = tract.combofips;
    
    new_y = predict(temp_m,new_x,new_z,new_g);
    tractX = [tractX,new_y];
    
    eval(['estX_M2.model',num2str(vind),' = temp_m;']);
end
estX_M2.predX = tractX;

% unshrunken M2
estX_M2_unsh = [];
tractX = [];
for vind = 1:1:size(temp_XXX,2)
    
    temp_x = [ones(nobs,1), temp_d, temp_logDC];
    temp_y = temp_XXX(:,vind);
    
    % unshrunken estimator
    unsh_bet = [];
    unsh_xtval = [];
    for j=1:1:length(xtval)
        temp_sel = xtflag == xtval(j);
        temp_xx = temp_x(temp_sel,:);
        temp_yy = temp_y(temp_sel,:);
        temp_bet = (temp_xx'*temp_xx)\(temp_xx'*temp_yy);        
        unsh_bet = [unsh_bet; temp_bet'];
        unsh_xtval = [unsh_xtval; xtval(j)];
    end
    
    % prediction
    unsh_yy = ones(size(tract.combofips,1),1)*nan;
    for j=1:1:length(xtval)
        temp_sel = tract.combofips == xtval(j);
        new_dd = log(1+tract.min_dist(temp_sel,:));
        new_d2c = log(tract.d2c(temp_sel,:));
        new_xx = [ones(sum(temp_sel),1), new_dd, new_d2c];
        new_yy = new_xx*unsh_bet(j,:)';
        unsh_yy(temp_sel,:) = new_yy;
    end

    % store
    tractX = [tractX, unsh_yy];

    eval(['estX_M2_unsh.bet',num2str(vind),' = unsh_bet;']);
    eval(['estX_M2_unsh.xtval',num2str(vind),' = unsh_xtval;']);
end
estX_M2_unsh.predX = tractX;

%% Actual estimation
% Model 3: covariate depends on distance and its prior depends on area
% - Same as Model 1 but it depends on distance to coast as well
% - Unlike Model 2, the coefficient on the coast variable does not vary by
% metro area.
% Things can be improved
% a) other information such as distance to coast
% b) full covariance of regression error error for X1 and error for X2 for
% example
% c) taking account for prediction uncertainty
Options = [];
Options.Display = 'iter';
Options.TolFun = 1e-12;
% Options = optimoptions('fminunc', 'TolFun', 1e-15, 'TolX', 1e-15, 'MaxFunEvals', 2000);

temp_XXX = [temp_XX, temp_XX(:,14).^2, temp_XX(:,14).^3];

estX_M3 = [];
tractX = [];
for vind = 1:1:size(temp_XXX,2)
    temp_x = [ones(nobs,1) temp_ZZ(:,2), temp_d, temp_d.*temp_ZZ(:,2), temp_logDC];
    temp_y = temp_XXX(:,vind);
    temp_m = fitlmematrix(temp_x,temp_y,{[ones(nobs,1), temp_d]},{xtflag},...
        'CovariancePattern',{'FullCholesky'},...
        'RandomEffectPredictors',{{'ej1','ej2'}},'RandomEffectGroups',{'city'},...
        'OptimizerOptions',Options, ...
        'FitMethod', estinfo.FitMethod, ...
        'Verbose', 0);
    temp_m
    
    % for prediction
    new_nobs = size(tract.combofips,1);
    new_ZZ = zeros(new_nobs,1);
    for i=1:1:size(temp_ZZ2,1)
        new_ZZ(tract.combofips==xtval(i),1) = temp_ZZ2(i,2);
    end
    
    new_x = [ones(new_nobs,1), new_ZZ, ...
        log(1+tract.min_dist), new_ZZ.*log(1+tract.min_dist), ...
        log(tract.d2c), ...
        ];
    new_z = {[ones(new_nobs,1), log(1+tract.min_dist)]};
    new_g = tract.combofips;
    
    new_y = predict(temp_m,new_x,new_z,new_g);
    tractX = [tractX,new_y];
    
    eval(['estX_M3.model',num2str(vind),' = temp_m;']);
end
estX_M3.predX = tractX;

% unshrunken M3
estX_M3_unsh =[];
tractX = [];
for vind = 1:1:size(temp_XXX,2)
    
    temp_x = [ones(nobs,1), temp_d, temp_logDC];
    
    temp_x1 = [ones(nobs,1), temp_d]; %vary by city
    temp_x2 = [temp_logDC]; %constant
    temp_y = temp_XXX(:,vind);
    
    % preliminary (partialing out)
    temp_y_res = [];
    temp_x2_res = [];
    for j=1:1:length(xtval)
        temp_sel = xtflag == xtval(j);
        temp_xx = temp_x1(temp_sel,:);
        
        temp_yy = temp_y(temp_sel,:);
        temp_yy_res = temp_yy - temp_xx*( (temp_xx'*temp_xx)\(temp_xx'*temp_yy) );
        
        temp_xx2 = temp_x2(temp_sel,:);
        temp_xx2_res = temp_xx2 - temp_xx*( (temp_xx'*temp_xx)\(temp_xx'*temp_xx2) );
        
        temp_y_res = [temp_y_res; temp_yy_res];
        temp_x2_res = [temp_x2_res; temp_xx2_res];
    end
    unsh_bet_nv = (temp_x2_res'*temp_x2_res)\(temp_x2_res'*temp_y_res);
    
    temp_y_res = temp_y - temp_x2*unsh_bet_nv;
    
    % unshrunken estimator
    unsh_bet = [];
    unsh_xtval = [];
    for j=1:1:length(xtval)
        temp_sel = xtflag == xtval(j);
        temp_xx = temp_x1(temp_sel,:);
        temp_yy = temp_y_res(temp_sel,:);
        temp_bet = (temp_xx'*temp_xx)\(temp_xx'*temp_yy);        
        unsh_bet = [unsh_bet; [temp_bet', unsh_bet_nv]];
        unsh_xtval = [unsh_xtval; xtval(j)];
    end
    
    % prediction
    unsh_yy = ones(size(tract.combofips,1),1)*nan;
    for j=1:1:length(xtval)
        temp_sel = tract.combofips == xtval(j);
        new_dd = log(1+tract.min_dist(temp_sel,:));
        new_d2c = log(tract.d2c(temp_sel,:));
        new_xx = [ones(sum(temp_sel),1), new_dd, new_d2c];
        new_yy = new_xx*unsh_bet(j,:)';
        unsh_yy(temp_sel,:) = new_yy;
    end

    % store
    tractX = [tractX, unsh_yy];

    eval(['estX_M3_unsh.bet',num2str(vind),' = unsh_bet;']);
    eval(['estX_M3_unsh.xtval',num2str(vind),' = unsh_xtval;']);
end
estX_M3_unsh.predX = tractX;

%% Post estimation
% save
save('results_estX_shrunken_s3.mat', 'estX_M1', 'estX_M2', 'estX_M3', ...
    'estX_M1_unsh', 'estX_M2_unsh', 'estX_M3_unsh');



